// Copyright 2019 DeepMind Technologies Ltd. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef OPEN_SPIEL_ALGORITHMS_TABULAR_EXPLOITABILITY_H_
#define OPEN_SPIEL_ALGORITHMS_TABULAR_EXPLOITABILITY_H_

#include <string>
#include <unordered_map>

#include "open_spiel/algorithms/history_tree.h"
#include "open_spiel/policy.h"
#include "open_spiel/spiel.h"
#include "open_spiel/spiel_utils.h"

namespace open_spiel {
namespace algorithms {

// Returns the average utility that a best responder wins when when playing
// against the opponents' policies, each of which are assumed to be contained
// within the specified (joint) policy.
// This only works for zero- or constant-sum sequential games, otherwise raises
// an error.
double Exploitability(const Game& game, const Policy& policy);

// Same function provided for easy Python compatibility.
double Exploitability(
    const Game& game,
    const std::unordered_map<std::string, ActionsAndProbs>& policy);

// Calculates a measure of how far the given policy is from a Nash equilibrium
// by returning the sum of the improvements in the value that each player could
// obtain by unilaterally changing their strategy while the opposing player
// maintains their current strategy (which for a Nash equilibrium, this value
// is 0). This function only works for sequential games. Note: in zero-sum and
// constant-sum games, exploitability is equal to NashConv / (num. of players).
// The use_state_get_policy flag indicates whether to call
// Policy::GetStatePolicy(const State&) instead of
// Policy::GetStatePolicy(const std::string&) in the computation of the expected
// values of the joint policy.
//
// Note: these functions make use of TabularBestResponse, which assumes perfect
// recall games, and uses the game's State::ToString as a unique identifier for
// representing the value of the state.
double NashConv(const Game& game, const Policy& policy,
                bool use_state_get_policy);

// Same as above with use_state_get_policy set to false.
double NashConv(const Game& game, const Policy& policy);

// Same function provided for easy Python compatibility.
double NashConv(const Game& game,
                const std::unordered_map<std::string, ActionsAndProbs>& policy);

}  // namespace algorithms
}  // namespace open_spiel

#endif  // OPEN_SPIEL_ALGORITHMS_TABULAR_EXPLOITABILITY_H_
